/*
PROGRAM BY: Teresa Fort
Last edited 04/23/2020

This program makes an industry-hs code dataset 

The data contain a list of all potentia hs10 codes that are used as inputs by plants in this industry

For non-census years, can only do industry-hs mapping
for census years, can do both firm and industry-hs mapping

*/ 

%include 'yyyy/pdata.sas';

options obs=max;
    

libname ecroot 'xxxx/data';
%let input=xxxx/input ;
%let output=xxxx/data ;

*1) bring in the plant-mat code dataset; 
***************************************;

%macro make_hs_inputs(y);
%if &y. >= 1992 and &y. < 1997 %then %do;
data cmfmat&y.;
   length sic $ 4 ;
    set ecroot.cmf_mat_match1992;
    sic=substr(m,1,4);
    count=1;
    if fk_naics12~=" ";
run;



*A.  ****MAKE INDUSTRY BY HS MAPPING****;

*2. collapse to level at which i can merge;
***************************************;
proc sort data=cmfmat&y.; by fk_naics12 sic; run;

proc means sum noprint data=cmfmat&y.;
   by fk_naics12 sic;
   var mc;
   output out=mat_conc1(keep=sic fk_naics12 mc) sum()= ;
run;

data bridge(drop=naics);
    set  ecroot.mat_code_bridge_all;
    if year = &y.;
run;
***************************************;

*3. joinby using the concordance i made;
***************************************;
*left join on naics;

 proc sql ;
  create table mat_hs2 as 
  select *
  from Mat_conc1 left join bridge 
    on Mat_conc1.sic=bridge.sic ;
 quit;
***************************************; 


 
*4. collapse to get all HS codes by estab naics;
***************************************;
data mat_hs3; 
   set mat_hs2;
   if match_level=4 then mlevel_4=1;
   if match_level=3 then mlevel_3=1;
run;

proc sort data=mat_hs3; by fk_naics12 hs; run;

proc means sum noprint data=mat_hs3;
    by fk_naics12 hs ;
    var mlevel:; 
    output out=mat_hs4(drop= _type_) sum()= ;
run;    
    
data ecroot.sic_out_hs_in&y.(drop=_freq_);
    set mat_hs4;
    label fk_naics12="Plant industry based on FK NAICS 12" ;
    if hs~=. ;
    year = &y.;
run;
  *note: mlevel variables are the sum of the records for which this relationship mapped at that level;
***************************************;


*B.  ****MAKE FIRM-HS MAPPING****;

*2. collapse to level at which i can merge;
***************************************;
* only do firm-hs mapping for year 1992;
%if &y.=1992 %then %do;
proc sort data=cmfmat&y.; by firmid sic; run;

proc means sum noprint data=cmfmat&y.;
   by firmid sic;
   var mc count;
   output out=mat_firm1(keep=firmid sic mc count) sum()= ;
run;
***************************************;

*3. joinby using the concordance i made;
***************************************;
*left join on naics;

 proc sql ;
  create table firm_hs2 as 
  select *
  from mat_firm1 left join bridge 
    on  mat_firm1.sic=bridge.sic ;
 quit;
***************************************; 



*4. collapse to get all HS codes by firmid;
***************************************;
data firm_hs3; 
   set firm_hs2;
   if match_level=4 then mlevel_4=1;
   if match_level=3 then mlevel_3=1;
run;

proc sort data=firm_hs3; by firmid hs; run;

proc means sum noprint data=firm_hs3;
    by firmid hs ;
    var mlevel:; 
    output out=firm_hs4(drop= _type_) sum()= ;
run;    
    
data ecroot.firm_hs_inputs&y.(drop=_freq_);
   set firm_hs4;
   label hs="HS codes that map from naics material trailers of firm" ;
   if hs~=. ;
    year = &y.;
run;
%end;
%end;
    
%if &y. >= 1997 %then %do;
%if &y. >= 1997 & &y. < 2002 %then %do;
data cmfmat&y.;
   length naics $ 6 ;
    set ecroot.cmf_mat_match1997;
    naics=substr(mnaics,1,6);
    count=1;
    if fk_naics12~=" ";
run;
%end;

%if &y. >= 2002 & &y. < 2007 %then %do;
data cmfmat&y.;
   length naics $ 6 ;
    set ecroot.cmf_mat_match2002;
    naics=substr(mnaics,1,6);
    count=1;
    if fk_naics12~=" ";
run;
%end;

%if &y. >= 2007 & &y. < 2012 %then %do;
data cmfmat&y.;
   length naics $ 6 ;
    set ecroot.cmf_mat_match2007;
    naics=substr(mnaics,1,6);
    count=1;
    if fk_naics12~=" ";
run;
%end;

%if &y. >= 2012 & &y. < 2017 %then %do;
data cmfmat&y.;
   length naics $ 6 ;
    set ecroot.cmf_mat_match2012;
    naics=substr(mnaics,1,6);
    count=1;
    if fk_naics12~=" ";
run;
%end;
*A.  ****MAKE INDUSTRY BY HS MAPPING****;

*2. collapse to level at which i can merge;
***************************************;
proc sort data=cmfmat&y.; by fk_naics12 naics; run;

proc means sum noprint data=cmfmat&y.;
   by fk_naics12 naics;
   var mc;
   output out=mat_conc1(keep=naics fk_naics12 mc) sum()= ;
run;

data bridge;
    set  ecroot.mat_code_bridge_all ;
    if year = &y.;
run;
***************************************;

*3. joinby using the concordance i made;
***************************************;
*left join on naics;

 proc sql ;
  create table mat_hs2 as 
  select *
  from Mat_conc1 left join bridge 
    on Mat_conc1.naics=bridge.naics ;
 quit;
***************************************; 


 
*4. collapse to get all HS codes by estab naics;
***************************************;
data mat_hs3; 
   set mat_hs2;
   if match_level=6 then mlevel_6=1;
   if match_level=5 then mlevel_5=1;
   if match_level=4 then mlevel_4=1;
   if match_level=3 then mlevel_3=1;
run;

proc sort data=mat_hs3; by fk_naics12 hs; run;

proc means sum noprint data=mat_hs3;
    by fk_naics12 hs ;
    var mlevel:; 
    output out=mat_hs4(drop= _type_) sum()= ;
run;    
    
data ecroot.naics_out_hs_in&y.(drop=_freq_);
   set mat_hs4;
   label fk_naics12="Plant industry based on FK NAICS 12" ;
   if hs~=. ;
    year = &y.;
run;
  *note: mlevel variables are the sum of the records for which this relationship mapped at that level;
***************************************;


*B.  ****MAKE FIRM-HS MAPPING****;

*2. collapse to level at which i can merge;
***************************************;
* only do the following for census years;
%if &y.=1997 or &y.=2002 or &y.=2007 or &y.=2012 %then %do;
proc sort data=cmfmat&y.; by firmid naics; run;

proc means sum noprint data=cmfmat&y.;
   by firmid naics;
   var mc count;
   output out=mat_firm1(keep=firmid naics mc count) sum()= ;
run;

***************************************;

*3. joinby using the concordance i made;
***************************************;
*left join on naics;

 proc sql ;
  create table firm_hs2 as 
  select *
  from mat_firm1 left join bridge 
    on  mat_firm1.naics=bridge.naics ;
 quit;
***************************************; 



*4. collapse to get all HS codes by firmid;
***************************************;
data firm_hs3; 
   set firm_hs2;
   if match_level=6 then mlevel_6=1;
   if match_level=5 then mlevel_5=1;
   if match_level=4 then mlevel_4=1;
   if match_level=3 then mlevel_3=1;
run;

proc sort data=firm_hs3; by firmid hs; run;

proc means sum noprint data=firm_hs3;
    by firmid hs ;
    var mlevel:; 
    output out=firm_hs4(drop= _type_) sum()= ;
run;    
    
data ecroot.firm_hs_inputs&y.(drop=_freq_);
   set firm_hs4;
   label hs="HS codes that map from naics material trailers of firm" ;
   if hs~=. ;
   year=&y.;
run;
%end;
%end;
%mend;
  *note: mlevel variables are the sum of the records for which this relationship mapped at that level;
***************************************;
/*
*do this for all years;
%macro loop1();
    %do y=1992 %to 2016 %by 1;
        %make_hs_inputs(&y.);
    %end;
%mend;
%loop1();
*/

*do this 2007;
%macro loop1();
    %do y=2007 %to 2007 %by 1;
        %make_hs_inputs(&y.);
    %end;
%mend;
%loop1();
    
    
/*
  proc export data = ecroot.firm_hs_inputs
    file = "&output/check_mnaics02.dta"
    dbms=stata replace;
run;  
*/
